= KalmanFilterPerformance(p_missing=0) kf
Filter Perfomance and Stability
Measure performance between Standard Filter/ Square Root Filter, CPU/GPU, batched/not batched
KalmanFilterPerformance
KalmanFilterPerformance (n_obs=100, n_dim_obs=4, n_dim_state=3, n_dim_contr=3, bs=5, p_missing=0.3, init_method='random', use_sr_filter=True, device='cpu', use_conditional=True, use_batch=True, **kwargs)
Initialize self. See help(type(self)) for accurate signature.
'filter') kf.time_method(
0.14900339799987705
product_dict
product_dict (**kwargs)
perf_comb_params
perf_comb_params (method, n_obs=100, n_dim_obs=4, n_dim_state=3, n_dim_contr=3, bs=5, p_missing=0.3, init_method='random', use_sr_filter=True, device='cpu', use_conditional=True, use_batch=True)
'filter') perf_comb_params(
shape: (1, 10)
bs | device | n_dim_contr | n_dim_obs | n_dim_state | n_obs | time | use_batch | use_conditional | use_sr_filter |
---|---|---|---|---|---|---|---|---|---|
i64 | str | i64 | i64 | i64 | i64 | f64 | bool | bool | bool |
5 | "cpu" | 3 | 4 | 3 | 100 | 0.254086 | true | true | true |
SR vs Normal Filter
= perf_comb_params('filter', use_sr_filter=[True, False], rep=range(2)) perf1
perf1
shape: (200, 11)
bs | device | n_dim_contr | n_dim_obs | n_dim_state | n_obs | rep | time | use_batch | use_conditional | use_sr_filter |
---|---|---|---|---|---|---|---|---|---|---|
i64 | str | i64 | i64 | i64 | i64 | i64 | f64 | bool | bool | bool |
5 | "cpu" | 3 | 4 | 3 | 100 | 0 | 0.274235 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 1 | 0.265272 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 2 | 0.261474 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 3 | 0.257838 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 4 | 0.262304 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 5 | 0.269207 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 6 | 0.252871 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 7 | 0.262902 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 8 | 0.320611 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 9 | 0.359268 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 10 | 0.351303 | true | true | true |
5 | "cpu" | 3 | 4 | 3 | 100 | 11 | 0.349618 | true | true | true |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5 | "cpu" | 3 | 4 | 3 | 100 | 88 | 0.251293 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 89 | 0.258562 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 90 | 0.249729 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 91 | 0.25231 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 92 | 0.254119 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 93 | 0.2571 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 94 | 0.252648 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 95 | 0.251427 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 96 | 0.249157 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 97 | 0.252483 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 98 | 0.250241 | true | true | false |
5 | "cpu" | 3 | 4 | 3 | 100 | 99 | 0.252376 | true | true | false |
'use_sr_filter').agg(pl.col("time").mean()).with_column(pl.when(pl.col("use_sr_filter")).then(pl.lit("Square Root Filter")).otherwise(pl.lit("Standard Filter")).alias("Filter type")) perf1.groupby(
shape: (2, 3)
use_sr_filter | time | Filter type |
---|---|---|
bool | f64 | str |
true | 0.268333 | "Square Root Fi... |
false | 0.250166 | "Standard Filte... |
= perf1.with_column(pl.when(pl.col("use_sr_filter")).then(pl.lit("Square Root Filter")).otherwise(pl.lit("Standard Filter")).alias("Filter type")) perf1
= alt.Chart(perf1.to_pandas()).mark_boxplot(size = 50).encode(
plot_perf_sr =alt.X('Filter type', axis=alt.Axis(labelAngle=0)),
x=alt.Y('time', scale=alt.Scale(zero=False), title="time [s]"),
y=alt.Color('Filter type',
color= alt.Scale(scheme = 'accent'))
scale =300) ).properties(width
plot_perf_sr
CPU vs GPU
= perf_comb_params('filter', bs=1, n_obs=5, n_dim_contr=5, n_dim_obs=5, n_dim_state=5,
gpu_best =['cpu', 'cuda'], use_sr_filter=[True, False], p_missing=0, rep=2, use_batch=[True, False]) device
gpu_best
shape: (160, 12)
bs | device | n_dim_contr | n_dim_obs | n_dim_state | n_obs | p_missing | rep | time | use_batch | use_conditional | use_sr_filter |
---|---|---|---|---|---|---|---|---|---|---|---|
i64 | str | i64 | i64 | i64 | i64 | i64 | i64 | f64 | bool | bool | bool |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 0 | 0.134151 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 1 | 0.097733 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 2 | 0.074773 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 3 | 0.07477 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 4 | 0.092972 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 5 | 0.112495 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 6 | 0.106605 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 7 | 0.103665 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 8 | 0.094165 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 9 | 0.105327 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 10 | 0.096838 | true | true | true |
100 | "cpu" | 5 | 5 | 5 | 50 | 0 | 11 | 0.080813 | true | true | true |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 8 | 8.77501 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 9 | 10.170727 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 10 | 11.289223 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 11 | 11.012791 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 12 | 9.845103 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 13 | 9.998321 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 14 | 10.513864 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 15 | 9.192036 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 16 | 10.006169 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 17 | 8.915112 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 18 | 8.832785 | false | true | false |
100 | "cuda" | 5 | 5 | 5 | 50 | 0 | 19 | 9.227368 | false | true | false |
'device', 'use_batch']).agg(pl.col("time").mean()) gpu_best.groupby([
shape: (4, 3)
device | use_batch | time |
---|---|---|
str | bool | f64 |
"cuda" | false | 9.602944 |
"cpu" | false | 4.560856 |
"cuda" | true | 0.274758 |
"cpu" | true | 0.083738 |
= {'a': 1, 'b': (1,2)}
kwargs ={key:tuplify(arg) for key, arg in kwargs.items()} kwargs
list(product_dict(**kwargs))
[{'a': 1, 'b': 1}, {'a': 1, 'b': 2}]
= kf.get_method('filter') method
from timeit import timeit
'method()', globals={'method': method}, number=10) timeit(
0.15532574900134932
Performance
def compare_performance(n_obs, n_dim_obs, n_dim_state, n_dim_contr, bs, dtype=torch.float64):
= KalmanFilter.init_random(n_dim_obs,n_dim_state, dtype=dtype).cuda()
kf_cuda = get_test_data(n_dim_obs,n_dim_state, bs=bs, device="cuda", dtype=dtype)
data_cuda, mask_cuda
print("GPU")
= KalmanFilter.init_random(n_dim_obs,n_dim_state, dtype=dtype)
kf_cuda = get_test_data(n_dim_obs,n_dim_state, bs=bs, dtype=dtype)
data_cuda, mask_cuda print("CPU")
print("No batches CPU")
print("No batches GPU")
100, 2,2,100) compare_performance(
GPU
87.9 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
7.83 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
12.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
154 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
200, 10,10,200) compare_performance(
GPU
2.04 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
7.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
2.07 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Float64
100, 2,2,100, dtype=torch.float64) compare_performance(
GPU
100 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
8.29 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
159 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
200, 10,10,200, dtype=torch.float64) compare_performance(
GPU
2.22 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
8.35 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
2.01 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
Stability
kSR.Q_raw = torch.nn.Parameter(kSR.Q_raw + eye_like(kSR.Q_raw) * torch.sqrt(torch.tensor(1e-5)))
kSR.R_raw = torch.nn.Parameter(kSR.R_raw + eye_like(kSR.R_raw) * torch.sqrt(torch.tensor(1e-5)))
kSR.P0_raw = torch.nn.Parameter(kSR.P0_raw + eye_like(kSR.P0_raw) * torch.sqrt(torch.tensor(1e-5)))
fuzz_filter_SR
fuzz_filter_SR (n_iter=10, n_obs=50)
= fuzz_filter_SR(10, 120) err_raw
= err_raw.groupby('t').agg([
err 'MAE').median().alias("median"),
pl.col('MAE').quantile(.75).alias("Q3"),
pl.col('MAE').quantile(.25).alias("Q1"),
pl.col('MAE').max().alias("max")
pl.col( ])
= alt.Chart(err.to_pandas()).mark_line(color="black"
median
).encode(= alt.X('t', title="Number of Iterations"),
x = alt.Y('median', axis=alt.Axis(format=".1e"), scale=alt.Scale(type="log"), title="log MAE"),
y # color=datum("median"),
= datum("median")
strokeDash #, scale=alt.Scale(range=['black']))
)
= alt.Chart(err.to_pandas()).mark_line(color='dimgray', strokeDash=[4,6]).encode(x = 't', y = 'Q1', strokeDash=datum("quantile"))
Q1 = alt.Chart(err.to_pandas()).mark_line(color='dimgray', strokeDash=[4,6]).encode(x = 't', y = 'Q3', strokeDash=datum("quantile"))
Q3 max = alt.Chart(err.to_pandas()).mark_line(color='black', strokeDash=[2,2]).encode(x = 't', y = 'max', strokeDash=datum("max"))
= (Q1 + Q3 + max + median).interactive().properties(title="Standard Filter vs Square Root Filter (Mean Absolute Error of state cavariances)")
p p
plot_err_sr_filter
plot_err_sr_filter (err_raw)